LOAD PACKAGES

library(tidyverse)
library(GGally)
library(psych)
library(stats)
library(corrplot)

set.seed(210904)

raw <- read.csv("./data/moneyball-training-data.csv")

TIDY DATA

raw<-raw%>%filter(TEAM_PITCHING_SO>0)
raw<-raw %>%
  mutate(SO_factor = case_when(TEAM_BATTING_SO >= TEAM_PITCHING_SO*.96+10~ 'a',
                              (TEAM_BATTING_SO<TEAM_PITCHING_SO*.96+10 & TEAM_BATTING_SO>TEAM_PITCHING_SO*.96-50) ~'b',
                              (TEAM_BATTING_SO<TEAM_PITCHING_SO*.96-50 & TEAM_BATTING_SO>TEAM_PITCHING_SO*.75-0) ~'c',
                              TEAM_BATTING_SO<TEAM_PITCHING_SO*.75 & TEAM_BATTING_SO>TEAM_PITCHING_SO*.55 ~'d',
                              TEAM_BATTING_SO<TEAM_PITCHING_SO*.55~'e')) 
ggplot(data=raw%>%filter(TEAM_PITCHING_SO<2000), aes(x = TEAM_PITCHING_SO, y = TEAM_BATTING_SO, colour = SO_factor)) +
  geom_point()+geom_abline(slope=.55,intercept=0)

names(raw) <- gsub('TEAM_', '', x = names(raw))

After grouping the data along these parameters, we can see from one category that there are differences in the slopes and intercepts depending on which category they fall in

raw<-raw%>%mutate("BASERUN_NET_SB" = BASERUN_SB - BASERUN_CS) %>%
  mutate("OFFENSE_OBP" = (BATTING_H + BATTING_BB)/(BATTING_H + BATTING_BB +(162*27) )) %>%
  mutate("DEFENSE_OBP" = (PITCHING_H + PITCHING_BB )/(PITCHING_H  + PITCHING_BB +162*27 )) %>%
  mutate("TOT_AT_BATS" = BATTING_H + BATTING_BB  )
ggplot(raw, aes(x=BATTING_H,y=TARGET_WINS, color=SO_factor))+geom_point()+geom_smooth(method='lm', se=FALSE)
`geom_smooth()` using formula 'y ~ x'

raw <- raw %>%
  mutate("TOTAL_BASES" = BATTING_H + BATTING_BB )
df_temp<-raw%>%select(-c(BATTING_HBP,PITCHING_SO,PITCHING_H,PITCHING_HR,PITCHING_BB))


#ggpairs(df_temp, mapping=aes(color=SO_factor))

GENERAL MODEL

raw_select<-raw%>%select(-c(INDEX,SO_factor,TOT_AT_BATS,TOTAL_BASES,BASERUN_NET_SB,PITCHING_HR,BATTING_H,BATTING_2B,BATTING_SO,PITCHING_H,BATTING_3B,BASERUN_SB,BATTING_HBP,BATTING_BB,DEFENSE_OBP))
#raw_select<-raw_select%>%mutate(OFFENSE_OBP=log(OFFENSE_OBP))
summary(lm.raw<-lm(TARGET_WINS~.,data=raw_select))

Call:
lm(formula = TARGET_WINS ~ ., data = raw_select)

Residuals:
    Min      1Q  Median      3Q     Max 
-32.362  -6.789  -0.145   6.533  29.838 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  45.606548   8.551093   5.333 1.11e-07 ***
BATTING_HR    0.079112   0.009140   8.656  < 2e-16 ***
BASERUN_CS    0.119622   0.013601   8.795  < 2e-16 ***
PITCHING_BB   0.019523   0.003894   5.013 5.99e-07 ***
PITCHING_SO  -0.025115   0.002085 -12.047  < 2e-16 ***
FIELDING_E   -0.131117   0.009354 -14.018  < 2e-16 ***
FIELDING_DP  -0.113632   0.013526  -8.401  < 2e-16 ***
OFFENSE_OBP 210.649330  28.616636   7.361 3.02e-13 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 9.989 on 1478 degrees of freedom
  (668 observations deleted due to missingness)
Multiple R-squared:  0.3837,    Adjusted R-squared:  0.3807 
F-statistic: 131.4 on 7 and 1478 DF,  p-value: < 2.2e-16
library(ggResidpanel)
resid_panel(lm.raw)

Models by SO_factor

group a

df_a<-raw%>%filter(SO_factor=='a')

linear model

df_a_select<-df_a%>%select(-c(INDEX,SO_factor,PITCHING_HR,PITCHING_SO,BASERUN_CS,BATTING_2B,PITCHING_H,PITCHING_BB,BASERUN_SB,BATTING_3B,TOT_AT_BATS,BATTING_BB,BATTING_H,TOTAL_BASES,OFFENSE_OBP,BASERUN_NET_SB,FIELDING_DP))
summary(lm.a<-lm(TARGET_WINS~.,data=df_a_select))

Call:
lm(formula = TARGET_WINS ~ ., data = df_a_select)

Residuals:
     Min       1Q   Median       3Q      Max 
-19.9988  -6.2164  -0.2307   6.4255  21.1742 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -24.890319  21.178611  -1.175  0.24140    
BATTING_HR    0.085128   0.024692   3.448  0.00070 ***
BATTING_SO   -0.021945   0.006777  -3.238  0.00143 ** 
BATTING_HBP   0.089868   0.050191   1.791  0.07501 .  
FIELDING_E   -0.179445   0.041016  -4.375 2.03e-05 ***
DEFENSE_OBP 403.719475  64.224413   6.286 2.28e-09 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 8.729 on 185 degrees of freedom
  (772 observations deleted due to missingness)
Multiple R-squared:  0.4945,    Adjusted R-squared:  0.4809 
F-statistic:  36.2 on 5 and 185 DF,  p-value: < 2.2e-16
resid_interact(lm.a,plots = 'all')
Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]) :
  geom_GeomFunction() has yet to be implemented in plotly.
  If you'd like to see this geom implemented,
  Please open an issue with your example code at
  https://github.com/ropensci/plotly/issues

I really want to keep BATTING_HBP IN HERE, it accounts for so much R^2 but the p value is a little too high. what does this mean?

group b

df_b<-raw%>%filter(SO_factor=='b')

linear model

df_b_select<-df_b%>%select(-c(SO_factor,BATTING_HBP,TOT_AT_BATS,TOTAL_BASES,BASERUN_NET_SB,PITCHING_SO,PITCHING_BB,BATTING_SO,BASERUN_SB,PITCHING_SO,PITCHING_BB,PITCHING_SO,BATTING_BB,BATTING_SO,PITCHING_HR,FIELDING_DP,BATTING_2B,OFFENSE_OBP,PITCHING_H,BATTING_H))
summary(lm.b<-lm(TARGET_WINS~.,data=df_b_select))

Call:
lm(formula = TARGET_WINS ~ ., data = df_b_select)

Residuals:
     Min       1Q   Median       3Q      Max 
-26.3542  -6.7592   0.1075   6.7337  30.1801 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -3.526e+01  1.058e+01  -3.332 0.000934 ***
INDEX        1.741e-03  8.043e-04   2.164 0.030969 *  
BATTING_3B   1.635e-01  3.476e-02   4.705 3.41e-06 ***
BATTING_HR   1.035e-01  1.424e-02   7.271 1.65e-12 ***
BASERUN_CS   2.397e-01  2.152e-02  11.136  < 2e-16 ***
FIELDING_E  -1.859e-01  1.588e-02 -11.703  < 2e-16 ***
DEFENSE_OBP  3.450e+02  3.312e+01  10.419  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 10.24 on 439 degrees of freedom
  (320 observations deleted due to missingness)
Multiple R-squared:  0.5328,    Adjusted R-squared:  0.5264 
F-statistic: 83.43 on 6 and 439 DF,  p-value: < 2.2e-16
resid_panel(lm.b,plots = 'all')

group c

df_c<-raw%>%filter(SO_factor=='c')
ggplot(df_c, aes(y=TARGET_WINS, x=BATTING_HR))+geom_point()

splitting df_c by batting HR

df_c_a<-df_c%>%filter(BATTING_HR>110)
df_c_b<-df_c%>%filter(BATTING_HR<100)

IF YOU SPLIT OUT THESE 28 POINTS DF_C COMPARES WELL

linear model

df_c_b_select<-df_c_b%>%select(-c(INDEX,SO_factor,BATTING_HBP,BASERUN_NET_SB,FIELDING_DP,BASERUN_CS, BATTING_H,BATTING_2B,BATTING_BB,TOT_AT_BATS,TOTAL_BASES,DEFENSE_OBP,BATTING_SO,BATTING_HR,PITCHING_H,BATTING_3B,PITCHING_HR))
summary(lm.c_b<-lm(TARGET_WINS~.,data=df_c_b_select))

Call:
lm(formula = TARGET_WINS ~ ., data = df_c_b_select)

Residuals:
    Min      1Q  Median      3Q     Max 
-33.501  -8.359   0.103   9.300  35.638 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -90.866181  22.527979  -4.033 7.90e-05 ***
BASERUN_SB    0.080304   0.010027   8.009 1.05e-13 ***
PITCHING_BB  -0.031562   0.010660  -2.961  0.00345 ** 
PITCHING_SO   0.047340   0.008932   5.300 3.13e-07 ***
FIELDING_E   -0.090238   0.008789 -10.267  < 2e-16 ***
OFFENSE_OBP 568.806989  75.208506   7.563 1.54e-12 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 13.09 on 194 degrees of freedom
Multiple R-squared:  0.5114,    Adjusted R-squared:  0.4988 
F-statistic: 40.61 on 5 and 194 DF,  p-value: < 2.2e-16
ggpairs(df_c_b_select)

 plot: [1,1] [==>-------------------------------------------------------------------------------------------------------------------]  3% est: 0s 
 plot: [1,2] [======>---------------------------------------------------------------------------------------------------------------]  6% est: 1s 
 plot: [1,3] [=========>------------------------------------------------------------------------------------------------------------]  8% est: 1s 
 plot: [1,4] [============>---------------------------------------------------------------------------------------------------------] 11% est: 1s 
 plot: [1,5] [===============>------------------------------------------------------------------------------------------------------] 14% est: 1s 
 plot: [1,6] [===================>--------------------------------------------------------------------------------------------------] 17% est: 1s 
 plot: [2,1] [======================>-----------------------------------------------------------------------------------------------] 19% est: 1s 
 plot: [2,2] [=========================>--------------------------------------------------------------------------------------------] 22% est: 1s 
 plot: [2,3] [=============================>----------------------------------------------------------------------------------------] 25% est: 1s 
 plot: [2,4] [================================>-------------------------------------------------------------------------------------] 28% est: 1s 
 plot: [2,5] [===================================>----------------------------------------------------------------------------------] 31% est: 1s 
 plot: [2,6] [======================================>-------------------------------------------------------------------------------] 33% est: 1s 
 plot: [3,1] [==========================================>---------------------------------------------------------------------------] 36% est: 1s 
 plot: [3,2] [=============================================>------------------------------------------------------------------------] 39% est: 1s 
 plot: [3,3] [================================================>---------------------------------------------------------------------] 42% est: 1s 
 plot: [3,4] [===================================================>------------------------------------------------------------------] 44% est: 1s 
 plot: [3,5] [=======================================================>--------------------------------------------------------------] 47% est: 1s 
 plot: [3,6] [==========================================================>-----------------------------------------------------------] 50% est: 1s 
 plot: [4,1] [=============================================================>--------------------------------------------------------] 53% est: 1s 
 plot: [4,2] [=================================================================>----------------------------------------------------] 56% est: 1s 
 plot: [4,3] [====================================================================>-------------------------------------------------] 58% est: 1s 
 plot: [4,4] [=======================================================================>----------------------------------------------] 61% est: 1s 
 plot: [4,5] [==========================================================================>-------------------------------------------] 64% est: 1s 
 plot: [4,6] [==============================================================================>---------------------------------------] 67% est: 1s 
 plot: [5,1] [=================================================================================>------------------------------------] 69% est: 1s 
 plot: [5,2] [====================================================================================>---------------------------------] 72% est: 0s 
 plot: [5,3] [=======================================================================================>------------------------------] 75% est: 0s 
 plot: [5,4] [===========================================================================================>--------------------------] 78% est: 0s 
 plot: [5,5] [==============================================================================================>-----------------------] 81% est: 0s 
 plot: [5,6] [=================================================================================================>--------------------] 83% est: 0s 
 plot: [6,1] [=====================================================================================================>----------------] 86% est: 0s 
 plot: [6,2] [========================================================================================================>-------------] 89% est: 0s 
 plot: [6,3] [===========================================================================================================>----------] 92% est: 0s 
 plot: [6,4] [==============================================================================================================>-------] 94% est: 0s 
 plot: [6,5] [==================================================================================================================>---] 97% est: 0s 
 plot: [6,6] [======================================================================================================================]100% est: 0s 
                                                                                                                                                  

resid_panel(lm.c_b,plots = 'all')

group c_a

df_c_a_select<-df_c_a%>%select(-c(INDEX,SO_factor,BATTING_HBP,BASERUN_NET_SB,FIELDING_DP,TOT_AT_BATS,TOTAL_BASES,FIELDING_E,PITCHING_HR,BATTING_2B,BATTING_3B,BASERUN_SB,BATTING_BB,BATTING_SO,PITCHING_H,PITCHING_SO,DEFENSE_OBP,PITCHING_BB,BATTING_H,BASERUN_CS,OFFENSE_OBP))
summary(lm.c_a<-lm(TARGET_WINS~.,data=df_c_a_select))

Call:
lm(formula = TARGET_WINS ~ ., data = df_c_a_select)

Residuals:
    Min      1Q  Median      3Q     Max 
-18.042  -5.833  -2.302   7.768  26.223 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 55.04270   11.56426   4.760 7.65e-05 ***
BATTING_HR   0.16455    0.06844   2.404   0.0243 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 11.14 on 24 degrees of freedom
Multiple R-squared:  0.1941,    Adjusted R-squared:  0.1605 
F-statistic: 5.781 on 1 and 24 DF,  p-value: 0.02428
ggplot(df_c_a,aes(x=BATTING_HR,y=TARGET_WINS))+geom_point()+geom_smooth(method='lm')
`geom_smooth()` using formula 'y ~ x'

Looking at the 26 points with the whole data


out<-as.vector(df_c_a$INDEX)
df_outlier_factor<-raw%>%mutate(OUT_factor=case_when(
  (INDEX%in%out)~'in',
  TRUE~'out'
  )
)

looking out that out factor

c<-c('BASERUN_SB','PITCHING_SB','PITCHING_SO','FIELDING_E','OFFENSE_OBP')
df_c_examine<-df_outlier_factor%>%select(c(TARGET_WINS,BASERUN_SB,BASERUN_SB,PITCHING_SO,FIELDING_E,OFFENSE_OBP,OUT_factor,SO_factor))
ggpairs(df_c_examine%>%filter(SO_factor=='c'), mapping=aes(color=OUT_factor))

 plot: [1,1] [=>--------------------------------------------------------------------------------------------------------------------]  2% est: 0s 
 plot: [1,2] [====>-----------------------------------------------------------------------------------------------------------------]  4% est: 1s 
 plot: [1,3] [======>---------------------------------------------------------------------------------------------------------------]  6% est: 2s 
 plot: [1,4] [=========>------------------------------------------------------------------------------------------------------------]  8% est: 2s 
 plot: [1,5] [===========>----------------------------------------------------------------------------------------------------------] 10% est: 2s 
 plot: [1,6] [=============>--------------------------------------------------------------------------------------------------------] 12% est: 2s 
 plot: [1,7] [================>-----------------------------------------------------------------------------------------------------] 14% est: 2s 
 plot: [2,1] [==================>---------------------------------------------------------------------------------------------------] 16% est: 2s 
 plot: [2,2] [=====================>------------------------------------------------------------------------------------------------] 18% est: 2s 
 plot: [2,3] [=======================>----------------------------------------------------------------------------------------------] 20% est: 2s 
 plot: [2,4] [=========================>--------------------------------------------------------------------------------------------] 22% est: 2s 
 plot: [2,5] [============================>-----------------------------------------------------------------------------------------] 24% est: 2s 
 plot: [2,6] [==============================>---------------------------------------------------------------------------------------] 27% est: 2s 
 plot: [2,7] [=================================>------------------------------------------------------------------------------------] 29% est: 2s 
 plot: [3,1] [===================================>----------------------------------------------------------------------------------] 31% est: 2s 
 plot: [3,2] [======================================>-------------------------------------------------------------------------------] 33% est: 2s 
 plot: [3,3] [========================================>-----------------------------------------------------------------------------] 35% est: 2s 
 plot: [3,4] [==========================================>---------------------------------------------------------------------------] 37% est: 2s 
 plot: [3,5] [=============================================>------------------------------------------------------------------------] 39% est: 2s 
 plot: [3,6] [===============================================>----------------------------------------------------------------------] 41% est: 2s 
 plot: [3,7] [==================================================>-------------------------------------------------------------------] 43% est: 2s 
 plot: [4,1] [====================================================>-----------------------------------------------------------------] 45% est: 2s 
 plot: [4,2] [======================================================>---------------------------------------------------------------] 47% est: 2s 
 plot: [4,3] [=========================================================>------------------------------------------------------------] 49% est: 1s 
 plot: [4,4] [===========================================================>----------------------------------------------------------] 51% est: 1s 
 plot: [4,5] [==============================================================>-------------------------------------------------------] 53% est: 1s 
 plot: [4,6] [================================================================>-----------------------------------------------------] 55% est: 1s 
 plot: [4,7] [==================================================================>---------------------------------------------------] 57% est: 1s 
 plot: [5,1] [=====================================================================>------------------------------------------------] 59% est: 1s 
 plot: [5,2] [=======================================================================>----------------------------------------------] 61% est: 1s 
 plot: [5,3] [==========================================================================>-------------------------------------------] 63% est: 1s 
 plot: [5,4] [============================================================================>-----------------------------------------] 65% est: 1s 
 plot: [5,5] [==============================================================================>---------------------------------------] 67% est: 1s 
 plot: [5,6] [=================================================================================>------------------------------------] 69% est: 1s 
 plot: [5,7] [===================================================================================>----------------------------------] 71% est: 1s 
 plot: [6,1] [======================================================================================>-------------------------------] 73% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [6,2] [========================================================================================>-----------------------------] 76% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [6,3] [===========================================================================================>--------------------------] 78% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [6,4] [=============================================================================================>------------------------] 80% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [6,5] [===============================================================================================>----------------------] 82% est: 1s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [6,6] [==================================================================================================>-------------------] 84% est: 1s 
 plot: [6,7] [====================================================================================================>-----------------] 86% est: 0s 
 plot: [7,1] [=======================================================================================================>--------------] 88% est: 0s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [7,2] [=========================================================================================================>------------] 90% est: 0s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [7,3] [===========================================================================================================>----------] 92% est: 0s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [7,4] [==============================================================================================================>-------] 94% est: 0s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [7,5] [================================================================================================================>-----] 96% est: 0s `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

 plot: [7,6] [===================================================================================================================>--] 98% est: 0s 
 plot: [7,7] [======================================================================================================================]100% est: 0s 
                                                                                                                                                  

this group seems to disrupt the model’s predictive ability and are clearly outliers of this subgroup with no predictive variables of its own.

It also is clearly a differnt group from the SO_factor

lm.c<-lm.c_b

group d

df_d<-raw%>%filter(SO_factor=='d')

linear model

df_d_select<-df_d%>%select(-c(SO_factor,BATTING_HBP,TOT_AT_BATS,TOTAL_BASES,BASERUN_NET_SB,BASERUN_CS,BASERUN_SB,BATTING_3B,BATTING_HR,BATTING_H,BATTING_2B,PITCHING_H,PITCHING_HR,PITCHING_SO,BATTING_SO,DEFENSE_OBP))
summary(lm.d<-lm(TARGET_WINS~.,data=df_d_select))

Call:
lm(formula = TARGET_WINS ~ ., data = df_d_select)

Residuals:
     Min       1Q   Median       3Q      Max 
-16.0001  -6.4210  -0.4142   5.9837  20.7256 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.660e+01  4.103e+01   0.405 0.687728    
INDEX        2.831e-04  1.728e-03   0.164 0.870583    
BATTING_BB  -2.446e-01  7.764e-02  -3.151 0.002896 ** 
PITCHING_BB  1.582e-01  4.701e-02   3.366 0.001570 ** 
FIELDING_E  -2.858e-01  6.731e-02  -4.246 0.000108 ***
FIELDING_DP -1.460e-01  6.546e-02  -2.231 0.030711 *  
OFFENSE_OBP  4.126e+02  1.478e+02   2.791 0.007673 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 8.864 on 45 degrees of freedom
  (60 observations deleted due to missingness)
Multiple R-squared:  0.5133,    Adjusted R-squared:  0.4484 
F-statistic: 7.911 on 6 and 45 DF,  p-value: 7.546e-06
resid_interact(lm.d,plots = 'all')
Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]) :
  geom_GeomFunction() has yet to be implemented in plotly.
  If you'd like to see this geom implemented,
  Please open an issue with your example code at
  https://github.com/ropensci/plotly/issues

I really want to keep BATTING_HBP IN HERE, it accounts for so much R^2 but the p value is a little too high. what does this mean?

group e

df_e<-raw%>%filter(SO_factor=='e')

linear model

df_e_select<-df_e%>%select(-c(SO_factor,BATTING_HBP,BASERUN_NET_SB,FIELDING_DP,TOT_AT_BATS,TOTAL_BASES,BATTING_2B,BATTING_3B,BATTING_H,BATTING_BB,PITCHING_H,PITCHING_BB,DEFENSE_OBP,PITCHING_SO,BASERUN_CS,BASERUN_SB,BATTING_HR,BATTING_SO,PITCHING_HR))
summary(lm.e<-lm(TARGET_WINS~.,data=df_e_select))

Call:
lm(formula = TARGET_WINS ~ ., data = df_e_select)

Residuals:
    Min      1Q  Median      3Q     Max 
-37.586 -13.003   3.494  11.032  40.833 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -1.833e+01  1.653e+01  -1.109    0.271    
INDEX       -4.707e-04  2.256e-03  -0.209    0.835    
FIELDING_E  -6.152e-02  6.477e-03  -9.499 7.34e-15 ***
OFFENSE_OBP  5.210e+02  5.187e+01  10.045 6.08e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 16.82 on 82 degrees of freedom
Multiple R-squared:  0.687, Adjusted R-squared:  0.6755 
F-statistic: 59.99 on 3 and 82 DF,  p-value: < 2.2e-16
ggpairs(df_e_select)

 plot: [1,1] [======>---------------------------------------------------------------------------------------------------------------]  6% est: 0s 
 plot: [1,2] [==============>-------------------------------------------------------------------------------------------------------] 12% est: 0s 
 plot: [1,3] [=====================>------------------------------------------------------------------------------------------------] 19% est: 0s 
 plot: [1,4] [=============================>----------------------------------------------------------------------------------------] 25% est: 0s 
 plot: [2,1] [====================================>---------------------------------------------------------------------------------] 31% est: 0s 
 plot: [2,2] [===========================================>--------------------------------------------------------------------------] 38% est: 0s 
 plot: [2,3] [===================================================>------------------------------------------------------------------] 44% est: 0s 
 plot: [2,4] [==========================================================>-----------------------------------------------------------] 50% est: 0s 
 plot: [3,1] [=================================================================>----------------------------------------------------] 56% est: 0s 
 plot: [3,2] [=========================================================================>--------------------------------------------] 62% est: 0s 
 plot: [3,3] [================================================================================>-------------------------------------] 69% est: 0s 
 plot: [3,4] [=======================================================================================>------------------------------] 75% est: 0s 
 plot: [4,1] [===============================================================================================>----------------------] 81% est: 0s 
 plot: [4,2] [======================================================================================================>---------------] 88% est: 0s 
 plot: [4,3] [==============================================================================================================>-------] 94% est: 0s 
 plot: [4,4] [======================================================================================================================]100% est: 0s 
                                                                                                                                                  

resid_interact(lm.e,plots = 'all')
Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]) :
  geom_GeomFunction() has yet to be implemented in plotly.
  If you'd like to see this geom implemented,
  Please open an issue with your example code at
  https://github.com/ropensci/plotly/issues

Model results

r_raw<-summary(lm.raw)$adj.r.squared
r_a<-summary(lm.a)$adj.r.squared
r_b<-summary(lm.b)$adj.r.squared
r_c<-summary(lm.c)$adj.r.squared
r_d<-summary(lm.d)$adj.r.squared
r_e<-summary(lm.e)$adj.r.squared

r_adj<-round(c(r_raw,r_a,r_b,r_c,r_d,r_e),2)
model_names<-c('general',"group a","group b","group c","group d","group e")

data.frame(model_names,r_adj)
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoNCkxPQUQgUEFDS0FHRVMNCg0KDQpgYGB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShHR2FsbHkpDQpsaWJyYXJ5KHBzeWNoKQ0KbGlicmFyeShzdGF0cykNCmxpYnJhcnkoY29ycnBsb3QpDQoNCnNldC5zZWVkKDIxMDkwNCkNCg0KcmF3IDwtIHJlYWQuY3N2KCIuL2RhdGEvbW9uZXliYWxsLXRyYWluaW5nLWRhdGEuY3N2IikNCmBgYA0KDQoNClRJRFkgREFUQQ0KDQpgYGB7cn0NCnJhdzwtcmF3JT4lZmlsdGVyKFRFQU1fUElUQ0hJTkdfU08+MCkNCnJhdzwtcmF3ICU+JQ0KICBtdXRhdGUoU09fZmFjdG9yID0gY2FzZV93aGVuKFRFQU1fQkFUVElOR19TTyA+PSBURUFNX1BJVENISU5HX1NPKi45NisxMH4gJ2EnLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgKFRFQU1fQkFUVElOR19TTzxURUFNX1BJVENISU5HX1NPKi45NisxMCAmIFRFQU1fQkFUVElOR19TTz5URUFNX1BJVENISU5HX1NPKi45Ni01MCkgfidiJywNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIChURUFNX0JBVFRJTkdfU088VEVBTV9QSVRDSElOR19TTyouOTYtNTAgJiBURUFNX0JBVFRJTkdfU08+VEVBTV9QSVRDSElOR19TTyouNzUtMCkgfidjJywNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIFRFQU1fQkFUVElOR19TTzxURUFNX1BJVENISU5HX1NPKi43NSAmIFRFQU1fQkFUVElOR19TTz5URUFNX1BJVENISU5HX1NPKi41NSB+J2QnLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgVEVBTV9CQVRUSU5HX1NPPFRFQU1fUElUQ0hJTkdfU08qLjU1fidlJykpIA0KYGBgDQoNCg0KDQpgYGB7cn0NCmdncGxvdChkYXRhPXJhdyU+JWZpbHRlcihURUFNX1BJVENISU5HX1NPPDIwMDApLCBhZXMoeCA9IFRFQU1fUElUQ0hJTkdfU08sIHkgPSBURUFNX0JBVFRJTkdfU08sIGNvbG91ciA9IFNPX2ZhY3RvcikpICsNCiAgZ2VvbV9wb2ludCgpK2dlb21fYWJsaW5lKHNsb3BlPS41NSxpbnRlcmNlcHQ9MCkNCmBgYA0KDQoNCmBgYHtyfQ0KbmFtZXMocmF3KSA8LSBnc3ViKCdURUFNXycsICcnLCB4ID0gbmFtZXMocmF3KSkNCmBgYA0KDQpBZnRlciBncm91cGluZyB0aGUgZGF0YSBhbG9uZyB0aGVzZSBwYXJhbWV0ZXJzLCB3ZSBjYW4gc2VlIGZyb20gb25lIGNhdGVnb3J5IHRoYXQgdGhlcmUgYXJlIGRpZmZlcmVuY2VzIGluIHRoZSBzbG9wZXMgYW5kIGludGVyY2VwdHMgZGVwZW5kaW5nIG9uIHdoaWNoIGNhdGVnb3J5IHRoZXkgZmFsbCBpbg0KDQpgYGB7cn0NCnJhdzwtcmF3JT4lbXV0YXRlKCJCQVNFUlVOX05FVF9TQiIgPSBCQVNFUlVOX1NCIC0gQkFTRVJVTl9DUykgJT4lDQogIG11dGF0ZSgiT0ZGRU5TRV9PQlAiID0gKEJBVFRJTkdfSCArIEJBVFRJTkdfQkIpLyhCQVRUSU5HX0ggKyBCQVRUSU5HX0JCICsoMTYyKjI3KSApKSAlPiUNCiAgbXV0YXRlKCJERUZFTlNFX09CUCIgPSAoUElUQ0hJTkdfSCArIFBJVENISU5HX0JCICkvKFBJVENISU5HX0ggICsgUElUQ0hJTkdfQkIgKzE2MioyNyApKSAlPiUNCiAgbXV0YXRlKCJUT1RfQVRfQkFUUyIgPSBCQVRUSU5HX0ggKyBCQVRUSU5HX0JCICApDQpgYGANCg0KYGBge3J9DQpnZ3Bsb3QocmF3LCBhZXMoeD1CQVRUSU5HX0gseT1UQVJHRVRfV0lOUywgY29sb3I9U09fZmFjdG9yKSkrZ2VvbV9wb2ludCgpK2dlb21fc21vb3RoKG1ldGhvZD0nbG0nLCBzZT1GQUxTRSkNCmBgYA0KDQpgYGB7cn0NCnJhdyA8LSByYXcgJT4lDQogIG11dGF0ZSgiVE9UQUxfQkFTRVMiID0gQkFUVElOR19IICsgQkFUVElOR19CQiApDQpgYGANCg0KYGBge3J9DQpkZl90ZW1wPC1yYXclPiVzZWxlY3QoLWMoQkFUVElOR19IQlAsUElUQ0hJTkdfU08sUElUQ0hJTkdfSCxQSVRDSElOR19IUixQSVRDSElOR19CQikpDQoNCg0KI2dncGFpcnMoZGZfdGVtcCwgbWFwcGluZz1hZXMoY29sb3I9U09fZmFjdG9yKSkNCmBgYA0KDQpHRU5FUkFMIE1PREVMDQoNCmBgYHtyfQ0KcmF3X3NlbGVjdDwtcmF3JT4lc2VsZWN0KC1jKElOREVYLFNPX2ZhY3RvcixUT1RfQVRfQkFUUyxUT1RBTF9CQVNFUyxCQVNFUlVOX05FVF9TQixQSVRDSElOR19IUixCQVRUSU5HX0gsQkFUVElOR18yQixCQVRUSU5HX1NPLFBJVENISU5HX0gsQkFUVElOR18zQixCQVNFUlVOX1NCLEJBVFRJTkdfSEJQLEJBVFRJTkdfQkIsREVGRU5TRV9PQlApKQ0KI3Jhd19zZWxlY3Q8LXJhd19zZWxlY3QlPiVtdXRhdGUoT0ZGRU5TRV9PQlA9bG9nKE9GRkVOU0VfT0JQKSkNCnN1bW1hcnkobG0ucmF3PC1sbShUQVJHRVRfV0lOU34uLGRhdGE9cmF3X3NlbGVjdCkpDQpgYGANCg0KYGBge3J9DQpsaWJyYXJ5KGdnUmVzaWRwYW5lbCkNCnJlc2lkX3BhbmVsKGxtLnJhdykNCmBgYA0KDQpNb2RlbHMgYnkgU09fZmFjdG9yDQoNCmdyb3VwIGENCg0KDQpgYGB7cn0NCmRmX2E8LXJhdyU+JWZpbHRlcihTT19mYWN0b3I9PSdhJykNCg0KYGBgDQoNCmxpbmVhciBtb2RlbA0KYGBge3J9DQpkZl9hX3NlbGVjdDwtZGZfYSU+JXNlbGVjdCgtYyhJTkRFWCxTT19mYWN0b3IsUElUQ0hJTkdfSFIsUElUQ0hJTkdfU08sQkFTRVJVTl9DUyxCQVRUSU5HXzJCLFBJVENISU5HX0gsUElUQ0hJTkdfQkIsQkFTRVJVTl9TQixCQVRUSU5HXzNCLFRPVF9BVF9CQVRTLEJBVFRJTkdfQkIsQkFUVElOR19ILFRPVEFMX0JBU0VTLE9GRkVOU0VfT0JQLEJBU0VSVU5fTkVUX1NCLEZJRUxESU5HX0RQKSkNCnN1bW1hcnkobG0uYTwtbG0oVEFSR0VUX1dJTlN+LixkYXRhPWRmX2Ffc2VsZWN0KSkNCmBgYA0KDQpgYGB7cn0NCnJlc2lkX2ludGVyYWN0KGxtLmEscGxvdHMgPSAnYWxsJykNCmBgYA0KDQpJIHJlYWxseSB3YW50IHRvIGtlZXAgQkFUVElOR19IQlAgSU4gSEVSRSwgaXQgYWNjb3VudHMgZm9yIHNvIG11Y2ggUl4yIGJ1dCB0aGUgcCB2YWx1ZSBpcyBhIGxpdHRsZSB0b28gaGlnaC4gd2hhdCBkb2VzIHRoaXMgbWVhbj8NCg0KDQoNCmdyb3VwIGINCmBgYHtyfQ0KZGZfYjwtcmF3JT4lZmlsdGVyKFNPX2ZhY3Rvcj09J2InKQ0KDQpgYGANCg0KbGluZWFyIG1vZGVsDQpgYGB7cn0NCmRmX2Jfc2VsZWN0PC1kZl9iJT4lc2VsZWN0KC1jKFNPX2ZhY3RvcixCQVRUSU5HX0hCUCxUT1RfQVRfQkFUUyxUT1RBTF9CQVNFUyxCQVNFUlVOX05FVF9TQixQSVRDSElOR19TTyxQSVRDSElOR19CQixCQVRUSU5HX1NPLEJBU0VSVU5fU0IsUElUQ0hJTkdfU08sUElUQ0hJTkdfQkIsUElUQ0hJTkdfU08sQkFUVElOR19CQixCQVRUSU5HX1NPLFBJVENISU5HX0hSLEZJRUxESU5HX0RQLEJBVFRJTkdfMkIsT0ZGRU5TRV9PQlAsUElUQ0hJTkdfSCxCQVRUSU5HX0gpKQ0Kc3VtbWFyeShsbS5iPC1sbShUQVJHRVRfV0lOU34uLGRhdGE9ZGZfYl9zZWxlY3QpKQ0KYGBgDQoNCmBgYHtyfQ0KcmVzaWRfcGFuZWwobG0uYixwbG90cyA9ICdhbGwnKQ0KYGBgDQoNCg0KDQoNCg0KZ3JvdXAgYw0KDQoNCmBgYHtyfQ0KZGZfYzwtcmF3JT4lZmlsdGVyKFNPX2ZhY3Rvcj09J2MnKQ0KDQpgYGANCg0KYGBge3J9DQpnZ3Bsb3QoZGZfYywgYWVzKHk9VEFSR0VUX1dJTlMsIHg9QkFUVElOR19IUikpK2dlb21fcG9pbnQoKQ0KYGBgDQpzcGxpdHRpbmcgZGZfYyBieSBiYXR0aW5nIEhSDQoNCmBgYHtyfQ0KZGZfY19hPC1kZl9jJT4lZmlsdGVyKEJBVFRJTkdfSFI+MTEwKQ0KZGZfY19iPC1kZl9jJT4lZmlsdGVyKEJBVFRJTkdfSFI8MTAwKQ0KYGBgDQoNCklGIFlPVSBTUExJVCBPVVQgVEhFU0UgMjggUE9JTlRTIERGX0MgQ09NUEFSRVMgV0VMTA0KDQoNCmxpbmVhciBtb2RlbA0KYGBge3J9DQpkZl9jX2Jfc2VsZWN0PC1kZl9jX2IlPiVzZWxlY3QoLWMoSU5ERVgsU09fZmFjdG9yLEJBVFRJTkdfSEJQLEJBU0VSVU5fTkVUX1NCLEZJRUxESU5HX0RQLEJBU0VSVU5fQ1MsIEJBVFRJTkdfSCxCQVRUSU5HXzJCLEJBVFRJTkdfQkIsVE9UX0FUX0JBVFMsVE9UQUxfQkFTRVMsREVGRU5TRV9PQlAsQkFUVElOR19TTyxCQVRUSU5HX0hSLFBJVENISU5HX0gsQkFUVElOR18zQixQSVRDSElOR19IUikpDQpzdW1tYXJ5KGxtLmNfYjwtbG0oVEFSR0VUX1dJTlN+LixkYXRhPWRmX2NfYl9zZWxlY3QpKQ0KYGBgDQoNCmBgYHtyfQ0KZ2dwYWlycyhkZl9jX2Jfc2VsZWN0KQ0KYGBgDQoNCg0KYGBge3J9DQpyZXNpZF9wYW5lbChsbS5jX2IscGxvdHMgPSAnYWxsJykNCmBgYA0KDQoNCmdyb3VwIGNfYQ0KDQpgYGB7cn0NCmRmX2NfYV9zZWxlY3Q8LWRmX2NfYSU+JXNlbGVjdCgtYyhJTkRFWCxTT19mYWN0b3IsQkFUVElOR19IQlAsQkFTRVJVTl9ORVRfU0IsRklFTERJTkdfRFAsVE9UX0FUX0JBVFMsVE9UQUxfQkFTRVMsRklFTERJTkdfRSxQSVRDSElOR19IUixCQVRUSU5HXzJCLEJBVFRJTkdfM0IsQkFTRVJVTl9TQixCQVRUSU5HX0JCLEJBVFRJTkdfU08sUElUQ0hJTkdfSCxQSVRDSElOR19TTyxERUZFTlNFX09CUCxQSVRDSElOR19CQixCQVRUSU5HX0gsQkFTRVJVTl9DUyxPRkZFTlNFX09CUCkpDQpzdW1tYXJ5KGxtLmNfYTwtbG0oVEFSR0VUX1dJTlN+LixkYXRhPWRmX2NfYV9zZWxlY3QpKQ0KYGBgDQpgYGB7cn0NCmdncGxvdChkZl9jX2EsYWVzKHg9QkFUVElOR19IUix5PVRBUkdFVF9XSU5TKSkrZ2VvbV9wb2ludCgpK2dlb21fc21vb3RoKG1ldGhvZD0nbG0nKQ0KYGBgDQoNCkxvb2tpbmcgYXQgdGhlIDI2IHBvaW50cyB3aXRoIHRoZSB3aG9sZSBkYXRhDQoNCmBgYHtyfQ0KDQpvdXQ8LWFzLnZlY3RvcihkZl9jX2EkSU5ERVgpDQpkZl9vdXRsaWVyX2ZhY3RvcjwtcmF3JT4lbXV0YXRlKE9VVF9mYWN0b3I9Y2FzZV93aGVuKA0KICAoSU5ERVglaW4lb3V0KX4naW4nLA0KICBUUlVFfidvdXQnDQogICkNCikNCmBgYA0KDQoNCg0KbG9va2luZyBvdXQgdGhhdCBvdXQgZmFjdG9yDQoNCmBgYHtyfQ0KYzwtYygnQkFTRVJVTl9TQicsJ1BJVENISU5HX1NCJywnUElUQ0hJTkdfU08nLCdGSUVMRElOR19FJywnT0ZGRU5TRV9PQlAnKQ0KZGZfY19leGFtaW5lPC1kZl9vdXRsaWVyX2ZhY3RvciU+JXNlbGVjdChjKFRBUkdFVF9XSU5TLEJBU0VSVU5fU0IsQkFTRVJVTl9TQixQSVRDSElOR19TTyxGSUVMRElOR19FLE9GRkVOU0VfT0JQLE9VVF9mYWN0b3IsU09fZmFjdG9yKSkNCmdncGFpcnMoZGZfY19leGFtaW5lJT4lZmlsdGVyKFNPX2ZhY3Rvcj09J2MnKSwgbWFwcGluZz1hZXMoY29sb3I9T1VUX2ZhY3RvcikpDQpgYGANCg0KDQp0aGlzIGdyb3VwIHNlZW1zIHRvIGRpc3J1cHQgdGhlIG1vZGVsJ3MgcHJlZGljdGl2ZSBhYmlsaXR5ICBhbmQgYXJlIGNsZWFybHkgb3V0bGllcnMgb2YgdGhpcyBzdWJncm91cCB3aXRoIG5vIHByZWRpY3RpdmUgdmFyaWFibGVzIG9mIGl0cyBvd24uIA0KDQpJdCBhbHNvIGlzIGNsZWFybHkgYSBkaWZmZXJudCBncm91cCBmcm9tIHRoZSBTT19mYWN0b3INCg0KYGBge3J9DQpsbS5jPC1sbS5jX2INCmBgYA0KDQoNCmdyb3VwIGQNCg0KDQpgYGB7cn0NCmRmX2Q8LXJhdyU+JWZpbHRlcihTT19mYWN0b3I9PSdkJykNCg0KYGBgDQoNCmxpbmVhciBtb2RlbA0KYGBge3J9DQpkZl9kX3NlbGVjdDwtZGZfZCU+JXNlbGVjdCgtYyhTT19mYWN0b3IsQkFUVElOR19IQlAsVE9UX0FUX0JBVFMsVE9UQUxfQkFTRVMsQkFTRVJVTl9ORVRfU0IsQkFTRVJVTl9DUyxCQVNFUlVOX1NCLEJBVFRJTkdfM0IsQkFUVElOR19IUixCQVRUSU5HX0gsQkFUVElOR18yQixQSVRDSElOR19ILFBJVENISU5HX0hSLFBJVENISU5HX1NPLEJBVFRJTkdfU08sREVGRU5TRV9PQlApKQ0Kc3VtbWFyeShsbS5kPC1sbShUQVJHRVRfV0lOU34uLGRhdGE9ZGZfZF9zZWxlY3QpKQ0KYGBgDQoNCmBgYHtyfQ0KcmVzaWRfaW50ZXJhY3QobG0uZCxwbG90cyA9ICdhbGwnKQ0KYGBgDQoNCkkgcmVhbGx5IHdhbnQgdG8ga2VlcCBCQVRUSU5HX0hCUCBJTiBIRVJFLCBpdCBhY2NvdW50cyBmb3Igc28gbXVjaCBSXjIgYnV0IHRoZSBwIHZhbHVlIGlzIGEgbGl0dGxlIHRvbyBoaWdoLiB3aGF0IGRvZXMgdGhpcyBtZWFuPw0KDQoNCg0KDQoNCmdyb3VwIGUNCg0KDQpgYGB7cn0NCmRmX2U8LXJhdyU+JWZpbHRlcihTT19mYWN0b3I9PSdlJykNCg0KYGBgDQoNCmxpbmVhciBtb2RlbA0KYGBge3J9DQpkZl9lX3NlbGVjdDwtZGZfZSU+JXNlbGVjdCgtYyhTT19mYWN0b3IsQkFUVElOR19IQlAsQkFTRVJVTl9ORVRfU0IsRklFTERJTkdfRFAsVE9UX0FUX0JBVFMsVE9UQUxfQkFTRVMsQkFUVElOR18yQixCQVRUSU5HXzNCLEJBVFRJTkdfSCxCQVRUSU5HX0JCLFBJVENISU5HX0gsUElUQ0hJTkdfQkIsREVGRU5TRV9PQlAsUElUQ0hJTkdfU08sQkFTRVJVTl9DUyxCQVNFUlVOX1NCLEJBVFRJTkdfSFIsQkFUVElOR19TTyxQSVRDSElOR19IUikpDQpzdW1tYXJ5KGxtLmU8LWxtKFRBUkdFVF9XSU5Tfi4sZGF0YT1kZl9lX3NlbGVjdCkpDQpgYGANCg0KYGBge3J9DQpnZ3BhaXJzKGRmX2Vfc2VsZWN0KQ0KYGBgDQoNCg0KYGBge3J9DQpyZXNpZF9pbnRlcmFjdChsbS5lLHBsb3RzID0gJ2FsbCcpDQpgYGANCg0KDQoNCk1vZGVsIHJlc3VsdHMNCg0KYGBge3J9DQpyX3Jhdzwtc3VtbWFyeShsbS5yYXcpJGFkai5yLnNxdWFyZWQNCnJfYTwtc3VtbWFyeShsbS5hKSRhZGouci5zcXVhcmVkDQpyX2I8LXN1bW1hcnkobG0uYikkYWRqLnIuc3F1YXJlZA0Kcl9jPC1zdW1tYXJ5KGxtLmMpJGFkai5yLnNxdWFyZWQNCnJfZDwtc3VtbWFyeShsbS5kKSRhZGouci5zcXVhcmVkDQpyX2U8LXN1bW1hcnkobG0uZSkkYWRqLnIuc3F1YXJlZA0KDQpyX2Fkajwtcm91bmQoYyhyX3JhdyxyX2Escl9iLHJfYyxyX2Qscl9lKSwyKQ0KbW9kZWxfbmFtZXM8LWMoJ2dlbmVyYWwnLCJncm91cCBhIiwiZ3JvdXAgYiIsImdyb3VwIGMiLCJncm91cCBkIiwiZ3JvdXAgZSIpDQoNCmRhdGEuZnJhbWUobW9kZWxfbmFtZXMscl9hZGopDQpgYGANCg0K